CREATE TABLE matching_cpc 
select
   c.patno as patno,
   count(distinct c.class) as c 
from
   uspto_class c 
   inner join
      uspto_meta m 
      on c.patno = m.patno 
where
   m.flag = 1 
   AND c.type = 'cpc' 
group by
   c.patno 
order by
   c.patno asc;
CREATE INDEX i1 
ON matching_cpc (patno);
create table matching_cites 
select
   c.citing,
   c.cited,
   IF(a1.name = a2.name 
   AND a1.name <> '' 
   and a1.name is not NULL, 1, 0) self,
   1 AS note,
   DATEDIFF(m2.grantdate, m1.grantdate) diff 
from
   uspto_cites c 
   left join
      uspto_assignee a1 
      on c.citing = a1.patno 
   left join
      uspto_assignee a2 
      on c.cited = a2.patno 
   inner join
      uspto_meta m1 
      on c.cited = m1.patno 
   inner join
      uspto_meta m2 
      on c.citing = m2.patno 
where
   c.country = 'US' 
   and m1.flag = 1 
   and m2.flag = 1;
CREATE TABLE matching_cites_forward 
select
   cited as patno,
   count(distinct citing) as forward_cites,
   count(distinct IF(self = 1, citing, NULL)) as forward_self_cites 
from
   matching_cites 
where
   diff <= 1825 
group by
   cited;
CREATE TABLE matching_cites_backward 
select
   citing as patno,
   count(distinct cited) as backward_cites,
   avg(diff) as diff_avg,
   stddev(diff) as diff_stddev 
from
   matching_cites 
where
   diff > 0 
group by
   citing;
# ALTER TABLE matching_cites ADD note INT NOT NULL DEFAULT 1;
create table matching_inventor 
select
   patno,
   count(*) AS count,
   CASE
      WHEN
         count(*) = 1 
      THEN
         1 
      WHEN
         count(*) = 2 
         OR count(*) = 3 
      THEN
         2 
      WHEN
         count(*) >= 4 
      THEN
         4 
   END
   AS bucket 
from
   uspto_inventor 
where
   length(patno) < 9 
group by
   patno;
# CREATE INDEX index_name 
ON table_name (column1, column2, ...);
CREATE INDEX i1 
ON matching_cites (citing);
CREATE INDEX i2 
ON matching_cites (cited);
CREATE INDEX i2 
ON matching_inventor (patno);
CREATE INDEX i1 
ON matching_cites_forward (patno);
CREATE INDEX i1 
ON matching_cites_backward (patno);
select
   count(*) 
from
   matching_cites;
select
   count(*) 
from
   matching_inventor;
select
   count(*) 
from
   matching_cpc;
describe matching_cites;
describe matching_inventor;
describe matching_cpc;
DROP TABLE IF EXISTS matching_2010_treated;
DROP TABLE IF EXISTS matching_2010_treated_complement;
DROP TABLE IF EXISTS matching_2010_control;
DROP TABLE IF EXISTS matching_2010_partial_1;
DROP TABLE IF EXISTS matching_2010;
CREATE TABLE matching_2010_treated 
select
   patno 
from
   govtz 
WHERE
   USG = 1 
   and ASG_CORP = 1 
   AND grantyear = 2010 
   AND country = 'US';
CREATE TABLE matching_2010_treated_complement 
select
   patno 
from
   govtz 
WHERE
   USG = 0 
   and ASG_CORP = 1 
   AND grantyear = 2010 
   AND country = 'US';
CREATE TABLE matching_2010_control 
select
   patno 
from
   govtz 
WHERE
   USG = 0 
   and ASG_CORP = 1 
   AND grantyear BETWEEN 2010 - 1 AND 2010 + 1 
   AND country = 'US';
# CREATE INDEX index_name 
ON table_name (column1, column2, ...);
CREATE INDEX i1 
ON matching_2010_treated (patno);
CREATE INDEX i1 
ON matching_2010_control (patno);
select
   count(*) 
from
   matching_2010_treated;
select
   count(*) 
from
   matching_2010_control;
# CREATE TABLE matching_2010_partial_1 
select
   t.patno as patno_treated,
   c.patno as patno_control,
   c1.class as cpc 
from
   matching_2010_treated t 
   join
      matching_2010_control c 
      on t.patno <> c.patno 
   inner join
      uspto_class c1 
      on c1.patno = t.patno 
   inner join
      uspto_class c2 
      on c2.patno = c.patno 
   inner join
      uspto_meta m1 
      on m1.patno = t.patno 
   inner join
      uspto_meta m2 
      on m2.patno = c.patno 
   inner join
      matching_inventor i1 
      on t.patno = i1.patno 
   inner join
      matching_inventor i2 
      on c.patno = i2.patno 
WHERE
   m1.grantdate <= '2010-02-28' 
   AND DATEDIFF(m1.grantdate, m2.grantdate) BETWEEN - 120 AND 120 
   AND c1.type = 'cpc' 
   and c2.type = 'cpc' 
   and c1.class = c2.class 
   and i1.bucket = i2.bucket 
group by
   patno_treated,
   patno_control,
   cpc;
# CREATE TABLE matching_2010 
select
   t.patno as patno_treated,
   c.patno as patno_control,
   c1.class as cpc 
from
   matching_2010_treated t 
   join
      matching_2010_control c 
      on t.patno <> c.patno 
   inner join
      uspto_class c1 
      on c1.patno = t.patno 
   inner join
      uspto_class c2 
      on c2.patno = c.patno 
   inner join
      uspto_meta m1 
      on m1.patno = t.patno 
   inner join
      uspto_meta m2 
      on m2.patno = c.patno 
   inner join
      matching_inventor i1 
      on t.patno = i1.patno 
   inner join
      matching_inventor i2 
      on c.patno = i2.patno 
WHERE
   m1.grantdate <= '2010-12-31' 
   AND DATEDIFF(m1.grantdate, m2.grantdate) BETWEEN - 365 AND 365 
   AND c1.type = 'cpc' 
   and c2.type = 'cpc' 
   and c1.class = c2.class 
   and 
   (
      i1.bucket = i2.bucket 
      OR 1 = 1
   )
group by
   patno_treated,
   patno_control,
   cpc;
CREATE TABLE matching_2010 
select
   t.patno as patno_treated,
   c.patno as patno_control,
   c1.class as cpc 
from
   matching_2010_treated t 
   join
      matching_2010_control c 
      on t.patno <> c.patno 
   inner join
      uspto_class c1 
      on c1.patno = t.patno 
   inner join
      uspto_class c2 
      on c2.patno = c.patno 
   inner join
      uspto_meta m1 
      on m1.patno = t.patno 
   inner join
      uspto_meta m2 
      on m2.patno = c.patno 
   inner join
      matching_inventor i1 
      on t.patno = i1.patno 
   inner join
      matching_inventor i2 
      on c.patno = i2.patno 
WHERE
   m1.grantdate <= '2010-12-31' 
   AND DATEDIFF(m1.grantdate, m2.grantdate) BETWEEN - 365 AND 365 
   AND c1.type = 'cpc' 
   and c2.type = 'cpc' 
   and c1.class = c2.class 
   and 
   (
      i1.bucket = i2.bucket 
      OR 1 = 0
   )
group by
   patno_treated,
   patno_control,
   cpc;
CREATE INDEX i1 
ON matching_2010 (patno_treated);
CREATE INDEX i2 
ON matching_2010 (patno_control);
CREATE INDEX i3 
ON matching_2010 (cpc);
DROP TABLE IF EXISTS matching_temp0;
DROP TABLE IF EXISTS matching_temp1;
DROP TABLE IF EXISTS matching_temp2;
DROP TABLE IF EXISTS matching_temp4;
DROP TABLE IF EXISTS matching_temp5;
DROP TABLE IF EXISTS matching_temp6;
DROP TABLE IF EXISTS matching_temp8;
DROP TABLE IF EXISTS matching_temp9;
CREATE TABLE matching_temp9 
select
   patno,
   count(*) as cc 
from
   novel 
where
   ngram = '-1' 
group by
   patno;
CREATE INDEX i2 
ON matching_temp9 (patno);
create table matching_temp0 
select
   patno_treated,
   patno_control,
   count(*)c 
from
   matching_2010 
group by
   patno_treated,
   patno_control;
# create table matching_temp1 
select
   patno_treated,
   max(c) m 
from
   matching_temp0 
group by
   patno_treated;
# create table matching_temp2 
select
   t0.patno_treated as patno_treated,
   t0.patno_control as patno_matched,
   t0.c as c 
from
   matching_temp0 t0 
   inner join
      matching_temp1 t1 
      on t0.patno_treated = t1.patno_treated 
      AND t0.c = t1.m;
# CREATE INDEX i1 
ON matching_temp2 (patno_treated);
# CREATE INDEX i2 
ON matching_temp2 (patno_matched);
create table matching_temp4 
select
   m.patno_treated as patno_treated,
   m.patno_control as patno_control,
   cc1.c as cpc_treated,
   cc2.c as cpc_control,
   m.c as cpc_overlapped,
   m.c / cc1.c / cc2.c as matching_score 
from
   matching_temp0 m 
   inner join
      matching_cpc cc1 
      on m.patno_treated = cc1.patno 
   inner join
      matching_cpc cc2 
      on m.patno_control = cc2.patno;
create table matching_temp5 
select
   patno_treated,
   max(matching_score) m 
from
   matching_temp4 
group by
   patno_treated;
CREATE INDEX i1 
ON matching_temp5 (patno_treated);
create table matching_temp6 
select
   t0.patno_treated as patno_treated,
   t0.patno_control as patno_matched,
   t0.cpc_overlapped,
   t0.matching_score as c 
from
   matching_temp4 t0 
   inner join
      matching_temp5 t1 
      on t0.patno_treated = t1.patno_treated 
      AND t0.matching_score = t1.m;
CREATE INDEX i1 
ON matching_temp6 (patno_treated);
CREATE INDEX i2 
ON matching_temp6 (patno_matched);
# 
select
   mt.patno_treated,
   mt.patno_matched,
   cc1.c,
   cc2.c,
   mt.c,
   count(distinct c1.citing),
   count(distinct c2.citing) 
from
   matching_temp2 mt 
   inner join
      matching_cites c1 
      on mt.patno_treated = c1.cited 
   inner join
      matching_cites c2 
      on mt.patno_matched = c2.cited 
   inner join
      matching_cpc cc1 
      on mt.patno_treated = cc1.patno 
   join
      matching_cpc cc2 
      on mt.patno_matched = cc2.patno 
WHERE
   c1.diff < 1825 
   and c2.diff < 1825 
   and mt.c > 5 
   AND c1.self + c2.self = 2 
group by
   mt.patno_treated,
   mt.patno_matched limit 100;
# 
select
   mt.patno_treated,
   mt.patno_matched,
   cc1.c,
   cc2.c,
   mt.c,
   count(distinct IF(c1.self = 1, c1.citing, NULL)),
   count(distinct c1.citing),
   count(distinct IF(c2.self = 1, c2.citing, NULL)),
   count(distinct c2.citing) 
from
   matching_temp2 mt 
   inner join
      matching_cites c1 
      on mt.patno_treated = c1.cited 
   inner join
      matching_cites c2 
      on mt.patno_matched = c2.cited 
   inner join
      matching_cpc cc1 
      on mt.patno_treated = cc1.patno 
   join
      matching_cpc cc2 
      on mt.patno_matched = cc2.patno 
WHERE
   c1.diff < 1825 
   and c2.diff < 1825 
   and mt.c > 5 
group by
   mt.patno_treated,
   mt.patno_matched limit 10;
# 
select
   mt.patno_treated,
   mt.patno_matched,
   cc1.c,
   cc2.c,
   mt.c,
   count(distinct IF(c1.self = 1, c1.citing, NULL)),
   count(distinct c1.citing),
   count(distinct IF(c2.self = 1, c2.citing, NULL)),
   count(distinct c2.citing),
   count(distinct c5.cited),
   avg(c5.diff),
   stddev(c5.diff),
   count(distinct c6.cited),
   avg(c6.diff),
   stddev(c6.diff) 
from
   matching_temp2 mt 
   inner join
      matching_cites c1 
      on mt.patno_treated = c1.cited 
   inner join
      matching_cites c2 
      on mt.patno_matched = c2.cited 
   inner join
      matching_cpc cc1 
      on mt.patno_treated = cc1.patno 
   join
      matching_cpc cc2 
      on mt.patno_matched = cc2.patno 
   inner join
      matching_cites c5 
      on mt.patno_treated = c5.citing 
   inner join
      matching_cites c6 
      on mt.patno_matched = c6.citing 
WHERE
   c1.diff < 1825 
   and c2.diff < 1825 
   and mt.c > 4 
group by
   mt.patno_treated,
   mt.patno_matched;
# 
select
   mt.patno_treated,
   mt.patno_matched,
   cc1.c,
   cc2.c,
   mt.c,
   c1.forward_cites,
   c1.forward_self_cites,
   c2.forward_cites,
   c2.forward_self_cites,
   c5.backward_cites,
   c5.diff_avg,
   c5.diff_stddev,
   c6.backward_cites,
   c6.diff_avg,
   c6.diff_stddev 
from
   matching_temp2 mt 
   inner join
      matching_cites_forward c1 
      on mt.patno_treated = c1.patno 
   inner join
      matching_cites_forward c2 
      on mt.patno_matched = c2.patno 
   inner join
      matching_cpc cc1 
      on mt.patno_treated = cc1.patno 
   join
      matching_cpc cc2 
      on mt.patno_matched = cc2.patno 
   inner join
      matching_cites_backward c5 
      on mt.patno_treated = c5.patno 
   inner join
      matching_cites_backward c6 
      on mt.patno_matched = c6.patno 
WHERE
   mt.c > 1 
group by
   mt.patno_treated,
   mt.patno_matched;
# 
select
   mt.patno_treated,
   mt.patno_matched,
   cc1.c,
   cc2.c,
   mt.cpc_overlapped,
   mt.c,
   c1.forward_cites,
   c1.forward_self_cites,
   c2.forward_cites,
   c2.forward_self_cites,
   c5.backward_cites,
   c5.diff_avg,
   c5.diff_stddev,
   c6.backward_cites,
   c6.diff_avg,
   c6.diff_stddev 
from
   matching_temp6 mt 
   inner join
      matching_cites_forward c1 
      on mt.patno_treated = c1.patno 
   inner join
      matching_cites_forward c2 
      on mt.patno_matched = c2.patno 
   inner join
      matching_cpc cc1 
      on mt.patno_treated = cc1.patno 
   join
      matching_cpc cc2 
      on mt.patno_matched = cc2.patno 
   inner join
      matching_cites_backward c5 
      on mt.patno_treated = c5.patno 
   inner join
      matching_cites_backward c6 
      on mt.patno_matched = c6.patno 
WHERE
   mt.c > 0 
group by
   mt.patno_treated,
   mt.patno_matched;
CREATE TABLE matching_temp8 
select
   IF(length(ff.matched) > 1 
   AND 
   (
      ff.matched = 'DIRECT' 
      OR ff.matched = 'asg'
   )
, 'DIRECT', 'CITING') as FLAG,
   mt.patno_treated,
   mt.patno_matched,
   cc1.c as treated_cpc_count,
   cc2.c as control_cpc_count,
   mt.cpc_overlapped,
   mt.c,
   c1.forward_cites treated_forward,
   c1.forward_self_cites treated_forward_self,
   c2.forward_cites control_forward,
   c2.forward_self_cites control_forward_self,
   c5.backward_cites treated_backward,
   c5.diff_avg treated_avg,
   c5.diff_stddev treated_stddev,
   c6.backward_cites control_backward,
   c6.diff_avg control_avg,
   c6.diff_stddev control_stddev 
from
   matching_temp6 mt 
   inner join
      matching_cites_forward c1 
      on mt.patno_treated = c1.patno 
   inner join
      matching_cites_forward c2 
      on mt.patno_matched = c2.patno 
   inner join
      matching_cpc cc1 
      on mt.patno_treated = cc1.patno 
   join
      matching_cpc cc2 
      on mt.patno_matched = cc2.patno 
   inner join
      matching_cites_backward c5 
      on mt.patno_treated = c5.patno 
   inner join
      matching_cites_backward c6 
      on mt.patno_matched = c6.patno 
   left join
      govtz ff 
      on mt.patno_treated = ff.patno 
WHERE
   mt.c > 0 
group by
   mt.patno_treated,
   mt.patno_matched;
# 
select
   t8.*,
   IF(t9t.patno IS NOT NULL, t9t.cc, 0) AS novel_treated,
   IF(t9c.patno IS NOT NULL, t9c.cc, 0) AS novel_control 
from
   matching_temp8 t8 
   left join
      matching_temp9 t9t 
      on t8.patno_treated = t9t.patno 
   left join
      matching_temp9 t9c 
      on t8.patno_matched = t9c.patno;
# 
select
   t8.*,
   IF(t9t.patno IS NOT NULL, t9t.cc, 0) AS novel_treated,
   IF(t9c.patno IS NOT NULL, t9c.cc, 0) AS novel_control,
   IF(t8.patno_treated IN 
   (
      select
         patno 
      from
         uspto_maint 
      where
         match(maintdesc) against('expired') 
         and DATEDIFF(maintdate, grantdate) < 365*5 
         and uptodate = 1
   )
, 0, 1) AS expired_4th_year_treated,
   IF(t8.patno_matched IN 
   (
      select
         patno 
      from
         uspto_maint 
      where
         match(maintdesc) against('expired') 
         and DATEDIFF(maintdate, grantdate) < 365*5 
         and uptodate = 1
   )
, 0, 1) AS expired_4th_year_control 
from
   matching_temp8 t8 
   left join
      matching_temp9 t9t 
      on t8.patno_treated = t9t.patno 
   left join
      matching_temp9 t9c 
      on t8.patno_matched = t9c.patno;
SELECT
   t8.*,
   IF(t9t.patno IS NOT NULL, t9t.cc, 0) as novel_treated,
   IF(t9c.patno IS NOT NULL, t9c.cc, 0) AS novel_control,
   IF(t8.patno_treated IN 
   (
      SELECT
         patno 
      FROM
         uspto_maint 
      WHERE
         match(maintdesc) against('expired') 
         AND datediff(maintdate, grantdate) < 365*5 
         AND uptodate = 1 
   )
, 1, 0) AS expired_4th_year_treated,
   IF(t8.patno_treated IN 
   (
      SELECT
         patno 
      FROM
         uspto_maint 
      WHERE
         match(maintdesc) against('expired') 
         AND datediff(maintdate, grantdate) BETWEEN 365*6 AND 365*9 
         AND uptodate = 1 
   )
, 1, 0) AS expired_8th_year_treated,
   IF(t8.patno_treated IN 
   (
      SELECT
         patno 
      FROM
         uspto_maint 
      WHERE
         match(maintdesc) against('expired') 
         AND datediff(maintdate, grantdate) BETWEEN 365*10 AND 365*14 
         AND uptodate = 1 
   )
, 1, 0) AS expired_12th_year_treated,
   IF(t8.patno_matched IN 
   (
      SELECT
         patno 
      FROM
         uspto_maint 
      WHERE
         match(maintdesc) against('expired') 
         AND datediff(maintdate, grantdate) < 365*5 
         AND uptodate = 1 
   )
, 1, 0) AS expired_4th_year_control,
   IF(t8.patno_matched IN 
   (
      SELECT
         patno 
      FROM
         uspto_maint 
      WHERE
         match(maintdesc) against('expired') 
         AND datediff(maintdate, grantdate) BETWEEN 365*6 AND 365*9 
         AND uptodate = 1 
   )
, 1, 0) AS expired_8th_year_control,
   IF(t8.patno_matched IN 
   (
      SELECT
         patno 
      FROM
         uspto_maint 
      WHERE
         match(maintdesc) against('expired') 
         AND datediff(maintdate, grantdate) BETWEEN 365*10 AND 365*14 
         AND uptodate = 1 
   )
, 1, 0) AS expired_12th_year_control,
   COUNT(DISTINCT ocet.id) blocking_total_treated,
   COUNT(DISTINCT ocet.appno) blocking_number_of_application_treated,
   COUNT(DISTINCT ocec.id) blocking_total_control,
   COUNT(DISTINCT ocec.appno) blocking_number_of_applicadtion_control,
   CASE
      WHEN
         zt.CPC_A = 1 
      THEN
         'A' 
      WHEN
         zt.CPC_B = 1 
      THEN
         'B' 
      WHEN
         zt.CPC_C = 1 
      THEN
         'C' 
      WHEN
         zt.CPC_D = 1 
      THEN
         'D' 
      WHEN
         zt.CPC_E = 1 
      THEN
         'E' 
      WHEN
         zt.CPC_F = 1 
      THEN
         'F' 
      WHEN
         zt.CPC_G = 1 
      THEN
         'G' 
      WHEN
         zt.CPC_H = 1 
      THEN
         'H' 
      WHEN
         zt.CPC_UNKNOWN = 1 
      THEN
         'Y' 
      ELSE
         'Y' 
   END
   AS cpc_treated, 
   CASE
      WHEN
         zc.CPC_A = 1 
      THEN
         'A' 
      WHEN
         zc.CPC_B = 1 
      THEN
         'B' 
      WHEN
         zc.CPC_C = 1 
      THEN
         'C' 
      WHEN
         zc.CPC_D = 1 
      THEN
         'D' 
      WHEN
         zc.CPC_E = 1 
      THEN
         'E' 
      WHEN
         zc.CPC_F = 1 
      THEN
         'F' 
      WHEN
         zc.CPC_G = 1 
      THEN
         'G' 
      WHEN
         zc.CPC_H = 1 
      THEN
         'H' 
      WHEN
         zc.CPC_UNKNOWN = 1 
      THEN
         'Y' 
      ELSE
         'Y' 
   END
   AS cpc_control, 1 
FROM
   matching_temp8 t8 
   LEFT JOIN
      matching_temp9 t9t 
      ON t8.patno_treated = t9t.patno 
   LEFT JOIN
      matching_temp9 t9c 
      ON t8.patno_matched = t9c.patno 
   LEFT JOIN
      uspto_oce_citation ocet 
      ON t8.patno_treated = ocet.priorart 
      AND ocet.f892 = 1 
   LEFT JOIN
      uspto_oce_citation ocec 
      ON t8.patno_matched = ocec.priorart 
      AND ocec.f892 = 1 
   LEFT JOIN
      govtz zt 
      ON t8.patno_treated = zt.patno 
   LEFT JOIN
      govtz zc 
      ON t8.patno_matched = zc.patno 
GROUP BY
   t8.patno_treated, t8.patno_matched ;


